DV2 - Homework 2.

Son Nam Nguyen

22 January 2022

if (!require("pacman")) {
  install.packages("pacman")
}

pacman::p_load(ggplot2, data.table, gganimate, tidygeocoder, ggmap, scatterpie)

df <- data.table(readRDS('flats.rds'))

Task 1

ggplot(df, aes(Area)) +
  geom_histogram(fill="#006D77", color="white", bins=40) +
  labs(title = "Distribution of the Area of flats (m2)") +
  theme_bw()

Task 2

ggplot(df[!is.na(df$Condition)], aes(Price)) +
  geom_density(aes(fill = Condition), alpha = 0.4) +
  labs(title = "Price distribution for flats in different conditions") +
  theme_bw()

Task 3

ggplot(df[!is.na(df$Condition)]) +
  geom_point(aes(Area, Price), alpha = 0.6) +
  geom_smooth(aes(Area, Price, color=Condition), method = 'lm', se = F) +
  labs(title = 'How the condition of the flats effects price to area') +
  theme_bw()

Task 4

ggplot(df[, .(avg_price = mean(Price)), by = District], aes(as.factor(District), avg_price)) +
  geom_col(fill="#006D77") +
  labs(y = 'Average price',
       x = 'Disctrict') +
  theme_bw()

Task 5

ggplot(df, aes(as.factor(District), Price)) +
  geom_boxplot(color="#006D77", fill="#66B7B0", alpha = 0.6) +
  labs(x = "Disctrict") +
  theme_bw()

Task 6

ggplot(df) +
  geom_violin(aes(as.factor(District), Price), fill = "#66B7B0") +
  labs(x = "District") +
  theme_bw()

Task 7

ggplot(df, aes(Price)) +
  geom_histogram(fill="#006D77", color="#66B7B0") +
  theme_bw() +
  transition_states(District) +
  labs(title = paste("District {closest_state}"),
       subtitle = "Number of flats: {nrow(subset(df, District == {closest_state}))} \n Mean price: {round(mean(subset(df, District == {closest_state})$Price))} Ft")

Bonus exercises

#compile address
df[, address := paste(paste0(District,". Kerulet"), "Budapest", "Hungary", sep = ', ')]

#split dataframe by district for batch processing
district <- df$District
splitdf <- split(df, district)

#batch geocoding with rbindlist
geocodes <- rbindlist(lapply(splitdf, function(x) tidygeocoder::geocode(x, 'address')))

#get budapest map
bbox <- c(bottom = 47.38, left = 18.92, top = 47.62, right = 19.30)

map_background <- get_stamenmap(bbox, zoom = 12, maptype = c("toner-background"), color = c("color", "bw"))
map_labels  <- get_stamenmap(bbox, zoom = 12, maptype = c("toner-labels"), color = c("color", "bw"))

map <- ggmap(map_background) + inset_ggmap(map_labels)
#first figure with the number of flats
map + 
  geom_point(data = geocodes[, .N, by = .(lat, long)], aes(long, lat, size = N), color = 'orange') +
  theme(legend.position = 'none',
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank())

df1 <- geocodes[!is.na(Comfort_lev), .(District, .N), by = .(lat, long, Comfort_lev)]
df2 <- dcast(df1, lat + long + District ~ Comfort_lev, value.var = "N")

setcolorder(df2, c("very low", "low", "average", "high", "very high", "luxury"))
map + geom_scatterpie(data = df2, 
                      aes(long, lat),
                      color=NA, 
                      alpha = 0.7, 
                      cols=c("very low", "low", "average", "high", "very high", "luxury"),
                      pie_scale= 2)+
        theme(legend.position= "top",
              legend.direction = "horizontal",
              axis.title = element_blank(),
              axis.text =element_blank(),
              axis.ticks =element_blank())+
        guides(fill = guide_legend(nrow = 1, title="Comfort level"))